---
title: "Figures and tables"
author: "Cornelius Erfort"
output: 
  pdf_document:
    dev: cairo_pdf
    toc: true
    number_sections: true
---

```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = T, tidy.opts=list(width.cutoff = 80), tidy = T, python.reticulate = F)
knitr::opts_knit$set(root.dir = dirname(getwd()))
```

Load packages
```{r packages, message=F}
source("scripts/packages.R")
```

# Load data
```{r load}
load("data/ads_imgs_vids.RData") %>% suppressWarnings()
```

# Main text
## General descriptives

```{r descriptives}
# Subset to those without missings
ads_imgs_vids <- filter(ads_imgs_vids,   !is.na(pg_party_family_name) & !(pg_party_family_name %in% c("no family")) & !is.na(only_women) & !is.na(parlgov_id) & !is.na(faces_any_fem) )
dim(ads_imgs_vids)

# Share of ads with image
mean(ads_imgs_vids$is_img) %>% round(2)

# Share of ads with video
mean(ads_imgs_vids$is_video) %>% round(2)

# Share of ads without image and video
mean(ads_imgs_vids$is_img == 0 & ads_imgs_vids$is_video == 0) %>% round(2)

# Share of images with one or more faces
mean(filter(ads_imgs_vids, is_img == 1)$nfaces > 0) %>% round(2)

# Share of images with one faces (of images with one or more)
mean(filter(ads_imgs_vids, is_img == 1 & nfaces > 0)$nfaces == 1) %>% round(2)

# Share of images with women
mean(filter(ads_imgs_vids, is_img == 1 & nfaces > 0)$faces_any_fem) %>% round(2)

# Share of images with men
mean(filter(ads_imgs_vids, is_img == 1 & nfaces > 0)$faces_any_mal) %>% round(2)

# Share of ads only seen by women
mean(ads_imgs_vids$only_women) %>% round(2)

```


## Figure 1b: Example ad with data from the Facebook Ad Library: Demograhpic audience composition
```{r example-ad}
# ads_imgs_vids %>% filter(id == 274917136782587)

example <- data.frame(Gender = c("Men", "Women", "Unknown", "Men", "Women", "Unknown", "Men", "Women", "Unknown", "Men", "Women", "Unknown", "Men", "Women", "Unknown", "Men", "Women", "Unknown", "Men", "Women", "Unknown"),
           age = c("13-17", "13-17", "13-17", "18-24", "18-24", "18-24", "25-34", "25-34", "25-34", "35-44", "35-44", "35-44", "45-54", "45-54", "45-54", "55-64", "55-64", "55-64", "65+", "65+", "65+"),
           
           label = c(NA,5,NA,NA,32,NA,NA,58,NA,NA,5,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA),
           Share = c(.5,5,.5,.5,32,.5,.5,58,.5,.5,5,.5,.5,.5,.5,.5,.5,.5,.5,.5,.5),
           dist = c(3,7,3,3,35,3,3,61,3,3,7,3,3,3,3,3,3,3,3,3,3))
example$Gender <- factor(example$Gender, levels = c("Unknown", "Women", "Men"))

ggplot(example %>% filter(Gender != "Unknown"), aes(y = Share, x = age)) + 
  geom_bar(stat = "identity", position = "dodge", aes(alpha = ifelse(Gender == "Women", NA, 0.6))) + 
  geom_text(aes(y = dist, label = str_c(label, "%"), family = "LM Roman 10"),
            position = position_dodge(width = 1), size =3) +
  theme_bw() +
  theme(text = element_text(family = "LM Roman 10", size = 10),
        axis.title = element_text(size = 10),
        axis.text = element_text(size = 10),
        legend.position = c(0.8, 0.8)) +
  scale_fill_manual(values = c("#F8766D", "#00BA38", "#619CFF"), labels = c("Unknown", "Women", "Men"), guide = guide_legend(reverse = TRUE) ) +
  ylab("Share [%]") + guides(alpha = F) +  coord_flip() + xlab("Age") +
  facet_wrap(~Gender, ncol = 1)

ggsave("plots/figure1b_example_ad_audience_composition.pdf", device = cairo_pdf, width = 4, height = 3)
```

## Figure 2: Predicted probabilities for the tailoring to women

```{r pred-prob}
# Import data from Stata to reproduce plots in ggplot2

margins <- read_dta("stata-output/margins.dta") %>% select(c(starts_with("_marg"), starts_with("_ci")))
margins$targeted <- c("other", "targeted at women") %>% as.factor()
names(margins) <- str_remove_all(names(margins), "_")

ggplot(margins,(aes(x = targeted))) +
  geom_point(aes(y = margin), size = 3) +
  geom_errorbar(aes(ymin = cilb, ymax = ciub), width = .075, size = .5, alpha = .5) +
  ylab("P(Woman on ad)") + 
  xlab("") +   
  theme_bw() +
  theme(text = element_text(family = "LM Roman 10", size = 15))

ggsave("plots/figure2_marginsplot.pdf", device = cairo_pdf, width = 4*2^.5, height = 4)
```

# SM A: Descriptives

Prepare data frame for targeting and tailoring vars

```{r descr}

# Calculate the share of ads targeted at women for each party
targeting <- ads_imgs_vids %>%  
  dplyr::group_by(parlgov_id) %>% 
  dplyr::summarise(n_ads = n(),
                   targeting = mean(only_women, na.rm = T))

# Calculate the share of ads with women for targeted 
# and non-targeted ads for each party (tailoring)
tailoring <- ads_imgs_vids %>%  
  dplyr::group_by(parlgov_id) %>%   
  dplyr::group_by(parlgov_id, only_women) %>% 
  filter(!is.na(only_women)) %>% 
  dplyr::summarise(any_fem_mean = mean(faces_any_fem, na.rm = T), 
                   nfaces_mean = mean(nfaces, na.rm = T)) %>% 
  pivot_wider(id_cols = c(parlgov_id), names_from = only_women, values_from = any_fem_mean) %>% 
  dplyr::rename(yes = `TRUE`, no = `FALSE`) %>%  
  mutate(tailoring = yes - no ) %>% 
  filter(!is.na(tailoring))

# Merge targeting and tailoring
tail_targ <- merge(targeting, tailoring, by = "parlgov_id", all = T)

# Add party info from ParlGov
parties <- read.csv("data/view_party.csv", encoding = "UTF-8") %>% 
  dplyr::rename(id = party_id) %>% 
  merge(info_id <- read.csv("data/info_id.csv") %>% 
          filter(table_variable == "party_family") %>% 
          select(c(id, name)) %>% 
          dplyr::rename(family_id = id, party_family_name = name), by = "family_id")
names(parties) <- names(parties) %>% str_c("parlgov_", .)

# Merge tailoring and targeting variables
tail_targ <- merge(tail_targ, parties, by = "parlgov_id", all.x = T)

# Calculate the average age and gender of the audience for each party
age_gender_targeting <- ads_imgs_vids %>% 
  group_by(parlgov_id) %>% 
  dplyr::summarise(audience_avg_age = mean(audience_avg_age, na.rm = T),
                   fem_audience = mean(fem_audience, na.rm = T),
                   n_ads = n()) %>% merge(parties, by = "parlgov_id") %>% 
  filter(parlgov_family_name  != "no family")
```


## Table with tailoring and targeting vars (Table A.1)

```{r tail-targ}
# Produce latex table
tail_targ %>% 
  merge(select(age_gender_targeting, c(parlgov_id, audience_avg_age)), by = "parlgov_id") %>% 
  select(c(parlgov_id, parlgov_party_name_short, parlgov_family_name_short, n_ads, targeting, 
           tailoring, audience_avg_age, parlgov_country_name)) %>% 
  mutate(audience_avg_age = round(audience_avg_age, 1)) %>% 
  stargazer(summary = F, type = "latex", rownames = F, out = "tables/table_a1_tailoring_targeting.tex")
```


## Summary statistics (Table A.2)
```{r sumstats}
# Make var factor
ads_imgs_vids$pg_party_family_name <- factor(ads_imgs_vids$pg_party_family_name, levels = c( "Communist/Socialist","Green/Ecologist", "Social democracy","Christian democracy" ,"Liberal","Conservative" , "Right-wing","Special issue","Agrarian" ))

# Prepare summary statistics for party families
sum_stats <- ads_imgs_vids %>%
  filter(!(pg_party_family_name %in% c("Special issue", "Agrarian", "Communist/Socialist"))) %>%
  dplyr::group_by(pg_party_family_name) %>% 
  dplyr::summarize("Number of ads" = dplyr::n() %>% comma(),
                   "Unique parties" = unique(parlgov_id) %>% length,
                   "Unique countries" = unique(country) %>% length,
                   "Ads with text only [%]" = round(mean(!is_img & !is_video)*100, 1),
                   "Ads with images [%]" = round(mean(is_img)*100, 1),
                   "  - Ads targeted at women [%]" = round(mean(fem_audience[is_img] == 1)*100, 1),
                   "  - Avg. women audience [%]" = round(mean(fem_audience[is_img]*100, na.rm =T), 1),
                   "  - Avg. audience age [y]" = round(mean(audience_avg_age[is_img], na.rm =T), 1),
                    "  - Any woman [%]" = mean(faces_any_fem[is_img]*100) %>% round(1),
                   "  - Only women [%]" = mean(faces_all_fem[is_img]*100) %>% round(1),
                   "  - With faces [%]" = round(mean(nfaces[is_img] > 0, na.rm = T)*100, 1),
                   "    - Avg. number of faces" =  mean(nfaces[is_img & nfaces > 0], na.rm = T) %>% round(1),
                   "    - Avg. age of faces [y]" =  mean(faces_age_mean[is_img & nfaces > 0], na.rm = T) %>% round(1),
                    "Ads with videos [%]" = round(mean(is_video)*100, 1),
                   "  - Ads targeted at women [%] " = round(mean(fem_audience[is_video] == 1)*100, 1),
                   "  - Avg. women audience [%] " = round(mean(fem_audience[is_video]*100, na.rm =T), 1),
                   "  - Avg. audience age [y] " = round(mean(audience_avg_age[is_video], na.rm =T), 1),
                    "  - Any woman [%] " = mean(faces_any_fem[is_video]*100) %>% round(1),
                   "  - Only women [%] " = mean(faces_all_fem[is_video]*100) %>% round(1),
                   "  - With faces [%] " = round(mean(nfaces[is_video] > 0, na.rm = T)*100, 1),
                   "    - Avg. number of faces " =  mean(nfaces[is_video & nfaces > 0], na.rm = T) %>% round(1),
                   "    - Avg. age of faces [y] " =  mean(faces_age_mean[is_video & nfaces > 0], na.rm = T) %>% round(1),
                  ) %>% t %>% as.data.frame()

# Prepare summary statistics for all parties, and add to other columns
sum_stats <- ads_imgs_vids %>% mutate(pg_party_family_name = "All") %>% 
    dplyr::group_by(pg_party_family_name) %>% 
  dplyr::summarize("Number of ads" = dplyr::n() %>% comma(),
                   "Unique parties" = unique(parlgov_id) %>% length,
                   "Unique countries" = unique(country) %>% length,
                   "Ads with text only [%]" = round(mean(!is_img & !is_video)*100, 1),
                   "Ads with images [%]" = round(mean(is_img)*100, 1),
                   "  - Ads targeted at women [%]" = round(mean(fem_audience[is_img] == 1)*100, 1),
                   "  - Avg. women audience [%]" = round(mean(fem_audience[is_img]*100, na.rm =T), 1),
                   "  - Avg. audience age [y]" = round(mean(audience_avg_age[is_img], na.rm =T), 1),
                   "  - Any woman [%]" = mean(faces_any_fem[is_img]*100) %>% round(1),
                   "  - Only women [%]" = mean(faces_all_fem[is_img]*100) %>% round(1),
                   "  - With faces [%]" = round(mean(nfaces[is_img] > 0, na.rm = T)*100, 1),
                   "    -- Avg. number of faces" =  mean(nfaces[is_img & nfaces > 0], na.rm = T) %>% round(1),
                   "    -- Avg. age of faces [y]" =  mean(faces_age_mean[is_img & nfaces > 0], na.rm = T) %>% round(1),
                    "Ads with videos [%]" = round(mean(is_video)*100, 1),
                   "  - Ads targeted at women [%] " = round(mean(fem_audience[is_video] == 1)*100, 1),
                   "  - Avg. women audience [%] " = round(mean(fem_audience[is_video]*100, na.rm =T), 1),
                   "  - Avg. audience age [y] " = round(mean(audience_avg_age[is_video], na.rm =T), 1),
                    "  - Any woman [%] " = mean(faces_any_fem[is_video]*100) %>% round(1),
                   "  - Only women [%] " = mean(faces_all_fem[is_video]*100) %>% round(1),
                   "  - With faces [%] " = round(mean(nfaces[is_video] > 0, na.rm = T)*100, 1),
                   "    - Avg. number of faces " =  mean(nfaces[is_video & nfaces > 0], na.rm = T) %>% round(1),
                   "    - Avg. age of faces [y] " =  mean(faces_age_mean[is_video & nfaces > 0], na.rm = T) %>% round(1),
                  ) %>% t %>% as.data.frame()%>% 
  cbind(sum_stats)


names(sum_stats) <- sum_stats[1, ]
sum_stats <- sum_stats[-1, ]
sum_stats

# Write latex table
stargazer(sum_stats,
          type = "latex", digits = 1, out = "tables/table_a2_summary_statistics.tex", summary = F)

```


## Distribution of gender ratio of ad target groups (Figure A.1)

```{r gender-ratio}
# Filter data for median
filtered_data <- age_gender_targeting %>% 
  filter(n_ads >= summary(age_gender_targeting$n_ads)["Median"])

# Define breaks, labels, and colors for party families
women_party_family <- c("agr", "com", "eco", "right", "spec", "chr", "con", "lib", "soc")
party_labels <- c("Agrarian", "Communist/Socialist", "Green/Ecologist", 
                  "Right-wing", "Special issue", "Christian democracy", 
                  "Conservative", "Liberal", "Social democracy")
party_shapes <- 1:9
party_colors <- c("dark grey", "purple", "green", "blue", "dark grey", 
                  "black", "brown", "orange", "red")

# Plot
ggplot(filtered_data) +
  geom_vline(xintercept = 0.5, color = "dark grey") +
  geom_point(aes(x = fem_audience, y = audience_avg_age,
                 color = parlgov_family_name_short,
                 shape = parlgov_family_name_short)) +
  ylim(27.5, 50) +
  xlim(0.25, 0.75) +
  theme_bw() +
  theme(
    legend.position = "bottom",
    text = element_text(family = "LM Roman 10")
  ) +
  scale_shape_manual(
    breaks = women_party_family,
    values = party_shapes,
    labels = party_labels,
    guide = guide_legend(nrow = 3, title = "Party family")
  ) +
  scale_color_manual(
    breaks = women_party_family,
    values = party_colors,
    labels = party_labels,
    guide = guide_legend(nrow = 3, title = "Party family")
  ) +
  xlab("Mean share of women among audience") +
  ylab("Mean age of audience") +
  new_scale_color() +
  scale_color_manual(
    breaks = women_party_family,
    values = party_colors,
    guide = FALSE
  ) +
  geom_text_repel(
    aes(x = fem_audience,
        y = audience_avg_age,
        label = parlgov_party_name_short,
        color = parlgov_family_name_short),
    family = "LM Roman 10",
    max.overlaps = 20,
    size = 2.5,
    force_pull = 10,
    force = 0.5,
    box.padding = 0.2,
    label.padding = 0.1,
    label.size = 0.15,
    label.r = 0.075,
    alpha = 0.9
  )

sum(age_gender_targeting$n_ads)

ggsave("plots/figure_a1_gender_ratio.pdf", device = cairo_pdf, width = 4*2^.5, height = 4)
```


## Tailoring and targeting by party (Figure A.2)

```{r tail-targ-fig}
# Filter data
filtered_tail_targ <- tail_targ %>% 
  filter(!is.na(tailoring) & parlgov_family_name_short != "none")

# Define party family attributes
women_party_family <- c("agr", "com", "eco", "right", "spec", "chr", "con", "lib", "soc")
party_labels <- c("Agrarian", "Communist/Socialist", "Green/Ecologist", 
                  "Right-wing", "Special issue", "Christian democracy", 
                  "Conservative", "Liberal", "Social democracy")
party_shapes <- 1:9
party_colors <- c("dark grey", "purple", "green", "blue", "dark grey", 
                  "black", "brown", "orange", "red")

# Plot
ggplot(filtered_tail_targ) +
  geom_hline(aes(yintercept = mean(tail_targ$tailoring, na.rm = TRUE)),
             linetype = 2, color = "darkgrey") +
  geom_point(aes(x = targeting, y = tailoring, color = parlgov_family_name_short, shape = parlgov_family_name_short)) +
  geom_hline(yintercept = 0) +
  geom_text(aes(x = targeting,
                y = tailoring,
                label = parlgov_party_name_short,
                color = parlgov_family_name_short),
            family = "LM Roman 10",
            hjust = -0.1,
            vjust = -0.1) +
  theme_bw() +
  theme(
    legend.position = "bottom",
    text = element_text(family = "LM Roman 10")
  ) +
  scale_shape_manual(
    breaks = women_party_family,
    values = party_shapes,
    labels = party_labels,
    guide = guide_legend(nrow = 3, title = "Party Family")
  ) +
  scale_color_manual(
    breaks = women_party_family,
    values = party_colors,
    labels = party_labels,
    guide = guide_legend(nrow = 3, title = "Party Family")
  ) +
  new_scale_color() +
  scale_color_manual(
    breaks = women_party_family,
    values = party_colors,
    guide = FALSE
  ) +
  xlab("Share of ads targeted at women (targeting)") +
  ylab("Difference in ad content (tailoring)")

ggsave("plots/figure_a2_tailoring_targeting_party_family.pdf", device = cairo_pdf, width = 4*2^.5, height = 4)
```

# SM C: Automated face detection
## Women on ad images by party family (Figure C.4)

```{r face-detection-fig}
# Add party info from ParlGov
parties <- read.csv("data/view_party.csv", encoding = "UTF-8") %>% 
  dplyr::rename(id = party_id) %>% 
  merge(info_id <- read.csv("data/info_id.csv") %>% 
          filter(table_variable == "party_family") %>% 
          select(c(id, name)) %>% 
          dplyr::rename(family_id = id, party_family_name = name), by = "family_id")
names(parties) <- names(parties) %>% str_c("parlgov_", .)

# Aggregate share of images with women by party family
women_party_family <- aggregate(faces_any_fem ~ parlgov_id + parlgov_party_family_name, ads_imgs_vids %>% 
                              filter((nfaces > 0) & is_img == 1) %>%  
                              merge(parties, by = "parlgov_id"), mean) %>% 
  filter(parlgov_party_family_name != "no family")

# Plot
ggplot(women_party_family) +
  geom_boxplot(aes(
    x = reorder(parlgov_party_family_name, -faces_any_fem, FUN = median),
    y = faces_any_fem
  )) +
  xlab("Party family") +
  ylab("Share of images with women") +
  theme_bw() +
  theme(
    axis.text.x = element_text(angle = 45, hjust = 1),
    text = element_text(family = "LM Roman 10"),
    axis.title = element_text(size = 10),
    axis.text = element_text(size = 10),
    plot.margin = unit(c(1, 0, 0, 1.5), "cm")
  )

ggsave("plots/figure_c4_women_ads_party_family.pdf", device = cairo_pdf, width = 4*2^.5, height = 4)

```


# SM E
## t-test (Table E.8)

```{r t-test}
# One-sample, one-tailed t-test
t.test(tailoring$tailoring, alternative = "greater") # true mean is greater than 0 -> tailoring
sd(tailoring$tailoring)

```


# SM F: Snapchat

```{r snapchat}
load("data/snapchat_ads.RData")
```

## Analyze Snapchat data

```{r snapchat-analysis}

snapchat_ads$Gender %>% table(useNA = "always")

# Number of ads in sample
dim(snapchat_ads) # 483

snapchat_ads$Gender[is.na(snapchat_ads$Gender)] <- ""

# Share of ads targeted
(snapchat_ads$Gender %>% table)/nrow(snapchat_ads)

# Share of women on images for ads targeted/not-targeted at women
snapchat_ads$faces_any_fem[snapchat_ads$Gender == "FEMALE"] %>% mean(na.rm = T)
snapchat_ads$faces_any_fem[snapchat_ads$Gender != "FEMALE"] %>% mean(na.rm = T)

table(snapchat_ads$faces_any_fem[snapchat_ads$Gender == "FEMALE"])
table(snapchat_ads$faces_any_fem[snapchat_ads$Gender != "FEMALE"])

# Number of parties
snapchat_ads$OrganizationName %>% unique %>% length


# Two-tailed z-test for difference in proportions

# Define the variables
p1 <- 2 / 62       # Proportion of targeted ads showing a woman
n1 <- 62           # Total number of targeted ads
p2 <- 66 / 421     # Proportion of non-targeted ads showing a woman
n2 <- 421          # Total number of non-targeted ads

# Calculate the pooled proportion
p_pool <- (2 + 66) / (n1 + n2)

# Calculate the z-statistic
z <- (p1 - p2) / sqrt(p_pool * (1 - p_pool) * (1/n1 + 1/n2))

# Calculate the p-value for a two-tailed test
p_value <- 2 * (1 - pnorm(abs(z)))

# Output the results
cat("Z-statistic:", z, "\n")
cat("P-value:", p_value, "\n")

```



# SM H: Differences in text content

## Figure H.6: Keyness plot for ES UP
```{r}
# ES, UP

# Subset to one party
subset_df <- filter(ads_imgs_vids, country %in% c("ES") & pg_party_name_short == "UP")
subset_df$parlgov_id %>% unique # 2724
table(subset_df$fem_audience == 1) # 23

# Plot keyness of words
textstat_keyness(dfm(tokens(subset_df$ad_creative_bodies)) %>% dfm_remove(stopwords("es")), target = subset_df$fem_audience == 1) %>% textplot_keyness(font = "LM Roman 10", n = 10)

ggsave("plots/figure_h6_keyness_ES_UP.png", width = 10, height = 4)
```

## Figure H.7: Keyness plot for ES CPC
```{r}
# ES, CPC

# Subset to one party
subset_df <- filter(ads_imgs_vids, country %in% c("ES") & pg_party_name_short == "C-PC")
subset_df$parlgov_id %>% unique # 2375
table(subset_df$fem_audience == 1) # 23

# Plot keyness of words
textstat_keyness(dfm(tokens(subset_df$ad_creative_bodies)) %>% dfm_remove(stopwords("es")), target = subset_df$fem_audience == 1) %>% textplot_keyness(font = "LM Roman 10", n = 10)

ggsave("plots/figure_h7_keyness_ES_C-PC.png", width = 10, height = 4)
```

## Figure H.8: Keyness plot for DE B90/Gru
```{r}
# DE, B90/Gru

# Subset to one party
subset_df <- filter(ads_imgs_vids, country %in% c("DE") & pg_party_name_short == "B90/Gru")
subset_df$parlgov_id %>% unique # 772
table(subset_df$fem_audience == 1) # 114

# Plot keyness of words
textstat_keyness(dfm(tokens(subset_df$ad_creative_bodies)) %>% dfm_remove(stopwords("de")), target = subset_df$fem_audience == 1) %>% textplot_keyness(font = "LM Roman 10", n = 10)

ggsave("plots/figure_h8_keyness_DE_B90-Gru.png", width = 10, height = 4)
```

## Figure H.9: Keyness plot for GB Lib
```{r}
# GB, Lib

# Subset to one party
subset_df <- filter(ads_imgs_vids, country %in% c("GB") & pg_party_name_short == "Lib")
subset_df$parlgov_id %>% unique # 659
table(subset_df$fem_audience == 1) # 1194

# Plot keyness of words
textstat_keyness(dfm(tokens(subset_df$ad_creative_bodies)) %>% dfm_remove(stopwords("en")), target = subset_df$fem_audience == 1) %>% textplot_keyness(font = "LM Roman 10", n = 10)

ggsave("plots/figure_h9_keyness_GB_Lib.png", width = 10, height = 4)
```


# SM I: Heterogenous Effects

## Women on ad images by party family (Figure I.10)

```{r women-fig}
# Add party info from ParlGov
parties <- read.csv("data/view_party.csv", encoding = "UTF-8") %>% 
  dplyr::rename(id = party_id) %>% 
  merge(info_id <- read.csv("data/info_id.csv") %>% 
          filter(table_variable == "party_family") %>% 
          select(c(id, name)) %>% 
          dplyr::rename(family_id = id, party_family_name = name), by = "family_id")
names(parties) <- names(parties) %>% str_c("parlgov_", .)

# Aggregate share of images with women by party family
women_party_family <- aggregate(faces_any_fem ~ parlgov_id + parlgov_party_family_name, ads_imgs_vids %>% 
                              filter((nfaces > 0) & is_img == 1) %>%  
                              merge(parties, by = "parlgov_id"), mean) %>% 
  filter(parlgov_party_family_name != "no family")

# Plot
ggplot(women_party_family) +
  geom_boxplot(aes(
    x = reorder(parlgov_party_family_name, -faces_any_fem, FUN = median),
    y = faces_any_fem
  )) +
  xlab("Party family") +
  ylab("Share of images with women") +
  theme_bw() +
  theme(
    axis.text.x = element_text(angle = 45, hjust = 1),
    text = element_text(family = "LM Roman 10"),
    axis.title = element_text(size = 10),
    axis.text = element_text(size = 10),
    plot.margin = unit(c(1, 0, 0, 1.5), "cm")
  )

ggsave("plots/figure_i10_women_ads_party_family.pdf", device = cairo_pdf, width = 4*2^.5, height = 4)

```

# Load data for marginsplot
```{r}
# Import data from Stata and reproduce plots in ggplot2
margins <- read_dta("stata-output/margins.dta") %>% select(c(starts_with("_marg"), starts_with("_ci")))
margins$targeted <- c("other", "targeted at women") %>% as.factor()
names(margins) <- str_remove_all(names(margins), "_")
```

Prepare data for all party families
```{r}
# Make var factor
margins$targeted <- c("no", "yes") %>% as.factor()

all <- margins[, 1:3]
all$family <- "All parties"

# Load files, one each per party family
margins <- data.frame()
for (file in list.files("stata-output", full.names = T) %>% str_subset("[:upper:].*.dta")) {
  margins <- read_dta(file) %>% select(c(starts_with("_marg"), starts_with("_ci"))) %>% mutate(family = basename(file) %>%  str_remove("\\.dta") %>% str_replace("_", " ")) %>%   rbind(margins)

}

margins$family[margins$family == "Communist Socialist"] <- "Communist/Socialist"
margins$family[margins$family == "Green Ecologist"] <- "Green/Ecologist"

names(all) <- names(margins)

# Add the data for all party families (combined)
margins <- rbind(all, margins)

margins$family <- factor(margins$family, levels = c("All parties", "Communist/Socialist", "Green/Ecologist", "Social democracy", "Christian democracy", "Liberal", "Conservative"))
margins$family_full <- margins$family
margins <- margins %>% 
  mutate(family = recode(family, "All parties" = "all", "Communist/Socialist" = "com", "Green/Ecologist" = "eco", "Social democracy" = "soc", "Christian democracy" = "chr", "Liberal" = "lib", "Conservative" = "con"))

margins$targeted <- c("no", "yes") %>% rep(nrow(margins)/2) %>% as.factor()
names(margins) <- str_remove_all(names(margins), "_")
```

## Marginsplot for all party families (Figure I.11)
```{r}
# Plot
ggplot(margins,(aes(x = targeted, group = family))) +
  geom_point(aes(y = margin, color = family), size = 3) +
  geom_errorbar(aes(ymin = cilb, ymax = ciub, color = family), width = .15, size = .5, alpha = .5) +
  geom_line(aes(y = margin, color = family), alpha = .5) +
  ylab("P(Woman on ad)") + 
  xlab("Ad targeted at women") +   
  theme_bw() +
  theme(text = element_text(family = "LM Roman 10", size = 18),
        legend.position = "bottom",
        legend.title = element_blank()) +
  scale_color_manual(values = c("black", "purple", "green", "red", "black", "dark orange", "brown"), breaks = c("all", "com", "eco", "soc", "chr", "lib", "con"), labels =  c("All parties (all)", "Communist/Socialist (com)", "Green/Ecologist (eco)", "Social democracy (soc)", "Christian democracy (chr)", "Liberal (lib)", "Conservative (con)")) +
  guides(color=guide_legend(nrow=4)) + 
  facet_grid(~family, space = "free_x")
                     
ggsave("plots/figure_i11_marginsplot_family.pdf", device = cairo_pdf, width = 5*2^.5, height = 5)

```

